In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
In [2]:
df = pd.read_csv('insurance.csv')
df.head()
Out[2]:
age sex bmi children smoker region charges
0 19 female 27.900 0 yes southwest 16884.92400
1 18 male 33.770 1 no southeast 1725.55230
2 28 male 33.000 3 no southeast 4449.46200
3 33 male 22.705 0 no northwest 21984.47061
4 32 male 28.880 0 no northwest 3866.85520
In [3]:
df.region.value_counts()
Out[3]:
southeast    364
southwest    325
northwest    325
northeast    324
Name: region, dtype: int64
In [4]:
df.shape
Out[4]:
(1338, 7)
In [5]:
results = pd.read_csv('results.csv')
results.head()
Out[5]:
Age Sex Bmi Children Smoker Region ActualValue PredictedValue
0 31.0 -1.0 25.740 0.0 1.0 -1.0 3756.62160 4082.469330
1 25.0 1.0 26.220 0.0 1.0 0.5 2721.32080 3014.207023
2 23.0 1.0 34.400 0.0 1.0 -0.5 1826.84300 4813.924792
3 27.0 1.0 42.130 0.0 -1.0 -1.0 39611.75770 31416.639078
4 60.0 -1.0 36.005 0.0 1.0 0.5 13228.84695 15080.698229
In [6]:
fig = px.scatter(results, x='ActualValue', y='PredictedValue')
x = np.linspace(results.ActualValue.min(), results.ActualValue.max(), num=1000)
fig.add_scatter(x=x, y=x, name='y=x')
fig.show()